# 这个是爬取课本录音的爬虫
# 导入requests 库，命令是  pip3 install requests
import requests
# 导入BeautifulSoup 库，命令是 pip3 install bs4
from bs4 import BeautifulSoup
# http://www.xdstudy.com/h5/follow/93/detail/
# http://www.xdstudy.com/h5/follow/2/detail/
# http://www.xdstudy.com/h5/follow/79/detail/
a = "http://www.xdstudy.com/h5/follow/"
b = "/detail/"
# print(a + b)
urlList = []
for i in range(2,94):
    c = a + str(i) + b
    urlList.append(c)
# print(urlList)
imgObj = {}
for u in urlList:
    # u = "http://www.xdstudy.com/h5/follow/2/detail/"
    response = requests.get(u)
    t = u.split("/")
    # print(t)
    n = t[-3]
    # print(n)

    htmlContent = response.content.decode("utf-8")
    soup = BeautifulSoup(htmlContent, 'lxml')
    lesson_img = soup.find_all("div",class_="top")
    imgList = []
    for img in lesson_img:
        # print(img.find_all("img"))
        cc = img.find_all("audio")
        for c in cc:
            # print(c["src"])
            imgList.append(c["src"])
    # print(imgList)
    imgObj[n] = imgList
print(imgObj)
















